###
### estimate heterogeneous probability of tune out by
### demographics and political alignment
### run gen_t_c_groups.R first
###

library(tidyverse)
library(parallel)
library(lubridate)
library(hms)
library(broom)
library(ggthemes)
library(data.table)
library(magrittr)
library(fixest)

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
data_dir <- paste0(path_to_archive, "data/")
setwd(data_dir)
plot_dir <- paste0(path_to_archive, "plots/")
tables_dir <- paste0(path_to_archive, "tables/")


load(file = paste0(data_dir, "indiv_tuneout_all.RData"))

#### merge w/ individual demo and ad-sponsor attributes ####

tuneout_all %<>% 
	as.data.table %>% 
	setnames(old="event_time_utc", new="ad_time_utc") %>%
	setkey(dma_code, channel, affiliate, program, ad_time_utc)

# by ad sponsor type
ads <- readRDS(paste0(data_dir, "final_ads.rds"))

# fill party for pres ads, select just party / ad sponsor type variables
ads_subset <- ads[ad_type=="cand_pres",party:=ifelse(Advertiser == "OBAMA BARACK FOR PRESIDENT ", "D", "R")] %>%
				.[,.(dma_code, channel, affiliate, program, ad_time_utc, ad_party=party, ad_type)] %>%
				  setkey(dma_code, channel, affiliate, program, ad_time_utc)

# load demo data 
### THIS FILE IS PROPRIETARY (FROM FWM DATA)
demo <- readRDS("stb/stb_demo_20130131.rds") %>% as.data.table %>% setkey(household_id)


### 
## regression of tuneout with individual FE.

one_day_fedata <- function(date) {
	cat(as.character(date), "\n")
	tuneout <- readRDS(paste0(data_dir, "tuneout/indiv_tuneout_", as.character(date), ".rds")) %>% 
		as.data.table %>%
		setnames(old="event_time_utc", new="ad_time_utc") %>%
		setkey(device_id)

	### THESE FILES ARE PROPRIETARY (FROM FWM DATA)
	ref <- readRDS(paste0(data_dir, "stb/ref_data/stb_ref_data_", as.character(date), ".rds")) %>% 
		as.data.table %>%
		.[,.(device_id, household_id)] %>%
		setkey(device_id)

	tuneout %>% 
		.[ref, nomatch=0] %>%
		setkey(household_id) %>%
		.[demo, nomatch=0] %>%
		setkey(dma_code, channel, affiliate, program, ad_time_utc) %>%
		.[ads_subset, nomatch=0] %>%
		.[,`:=` (rep = replace_na(rep,0),
				 dem = replace_na(dem,0),
				 ad_party = ad_party %>% replace_na("O") %>% fct_relevel("O","D","R"),
				 ad_type = fct_relevel(ad_type, "outside", "cand_house", "cand_statewide", "cand_senate","cand_pres"),
				 days_to_election = as.integer(mdy("11/06/2012") - date(ad_time_utc))
		)] %>%
		.[,.(tuned_out, household_id, ad_party, rep, dem, ad_type, days_to_election)] %>%
		setkey(household_id)
}

fedata <- dates %>% map(one_day_fedata) %>% rbindlist


fedata <- fedata[order(household_id, ad_party, ad_type, days_to_election)]

### TABLE 4

m_tuneout_fe_rs <- feols(tuned_out ~ ad_party + days_to_election | household_id, data=fedata[rep==1])
m_tuneout_fe_ds <- feols(tuned_out ~ ad_party + days_to_election | household_id, data=fedata[dem==1])
m_tuneout_fe_is <- feols(tuned_out ~ ad_party + days_to_election | household_id, data=fedata[rep==0 & dem==0])


setFixest_dict(c(
	ad_partyD = "Dem Sponsor",
	ad_partyR = "Rep Sponsor",
	ad_typecand_house = "House Election",
	ad_typecand_statewide = "Statewide Election",
	ad_typecand_senate = "Senate Election",
	days_to_election = "Days to Election",
	household_id = "Household",
	tuned_out = "Tuned Out"))

new_style = list(depvar="title:",
                 model="title:",
                 lines = "top:\\toprule; bottom:\\bottomrule",
                 var = "title:\\midrule", 
                 fixef = "title:\\midrule; suffix: FE;where:var",
                 stats = "title:\\midrule") 

setFixest_etable(fitstat = ~ r2, yesNo = "$\\checkmark$",
                 style = new_style)

etable(list(m_tuneout_fe_is, m_tuneout_fe_rs, m_tuneout_fe_ds),
	se = "cluster",
	title="Differential Tune-out by Sponsor Characteristics and Timing.",
	file=paste0(tables_dir, "tuneout_indiv.tex"),
	replace=T,
	fixef_sizes=T,
	label="tab:tuneout_indiv",
	extraline=list("Subsample"=c("Indep / Unknown", "Rep", "Dem")),
	notes="\\parbox[t]{0.7\\linewidth}{An observation is a household-ad. The sample is all households active and tuned in to the channel on which a political ad ran at the time the ad began, i.e. the treatment group from the differences-in-differences analyses. Column (1) restricts to households with independent or unknown party affiliation; column (2) restricts to Republican-identifying households, and column (3) restricts to Democratic-identifying households. The omitted category is outside group ads.}"
	  )